home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Tech Arsenal 1
/
Tech Arsenal (Arsenal Computer).ISO
/
tek-01
/
ohlfind.zip
/
CODE.C
< prev
next >
Wrap
C/C++ Source or Header
|
1990-05-17
|
3KB
|
152 lines
/* code -- code filenames for fast-find
Compress a sorted list.
Works with 'find' to encode and decode a filename database.
Usage:
bigram < list > bigrams
process-bigrams > common_bigrams
code common_bigrams < list > squeezed_list
Uses 'front compression' (see ";login:", March 1983, p. 8).
Output format is, per line, an offset differential count byte
followed by a partially bigram-encoded ASCII residue.
The codes are:
0-28 likeliest differential counts + offset to make nonnegative
30 escape code for out-of-range count to follow in next word
128-255 bigram codes (128 most common, as determined by 'updatedb')
32-127 single character (printable) ASCII residue
Author: James A. Woods (jaw@riacs.edu)
Modified by David MacKenzie (djm@ai.mit.edu)
Public domain. */
#include <stdio.h>
#include <sys/types.h>
#include <sys/param.h>
#ifndef MAXPATHLEN
#define MAXPATHLEN 1024
#endif
/* Switch code. */
#define RESET 30
char path[MAXPATHLEN];
char oldpath[MAXPATHLEN] = " ";
char bigrams[257] = {0};
void
main (argc, argv)
int argc;
char *argv[];
{
int count, oldcount, diffcount;
int j, code;
char bigram[3];
FILE *fp;
oldcount = 0;
bigram[2] = '\0';
if (argc != 2)
{
fprintf (stderr, "Usage: %s common_bigrams < list > coded_list\n",
argv[0]);
exit (2);
}
fp = fopen (argv[1], "r");
if (fp == NULL)
{
fprintf (stderr, "%s: ", argv[0]);
perror (argv[1]);
exit (1);
}
fgets (bigrams, 257, fp);
fwrite (bigrams, 1, 256, stdout);
while (fgets (path, sizeof path, stdin) != NULL)
{
path[strlen (path) - 1] = '\0'; /* Remove newline. */
/* Squelch unprintable chars so as not to botch decoding. */
for (j = 0; path[j] != '\0'; j++)
{
path[j] &= 0177;
if (path[j] < 040 || path[j] == 0177)
path[j] = '?';
}
count = prefix_length (oldpath, path);
diffcount = count - oldcount;
if (diffcount < -14 || diffcount > 14)
{
putc (RESET, stdout);
putw (diffcount + 14, stdout);
}
else
putc (diffcount + 14, stdout);
for (j = count; path[j] != '\0'; j += 2)
{
if (path[j + 1] == '\0')
{
putchar (path[j]);
break;
}
bigram[0] = path[j];
bigram[1] = path[j + 1];
/* Linear search for specific bigram in string table. */
code = strindex (bigrams, bigram);
if (code % 2 == 0)
putchar ((code / 2) | 0200);
else
fputs (bigram, stdout);
}
strcpy (oldpath, path);
oldcount = count;
}
exit (0);
}
/* Return location of PATTERN in STRING or -1. */
int
strindex (string, pattern)
char *string, *pattern;
{
register char *s, *p, *q;
for (s = string; *s != '\0'; s++)
if (*s == *pattern)
{
/* Fast first char check. */
for (p = pattern + 1, q = s + 1; *p != '\0'; p++, q++)
if (*q != *p)
break;
if (*p == '\0')
return q - strlen (pattern) - string;
}
return -1;
}
/* Return length of longest common prefix of strings S1 and S2. */
int
prefix_length (s1, s2)
char *s1, *s2;
{
register char *start;
for (start = s1; *s1 == *s2; s1++, s2++)
if (*s1 == '\0')
break;
return s1 - start;
}